#!/usr/bin/env python3
import argparse, os, glob, pandas as pd

GAUGE_ALIASES = ["gauge","gauge_group","group","rep_gauge","g"]
SIGMA_ALIASES_FUND = ["sigma_fund","sigma","sigma_c","string_tension"]
SIGMA_ALIASES_ADJ  = ["sigma_adj","sigma","string_tension"]

PREF_KEYS = ["b","k","n0","L","gauge"]

def find_col(df, candidates, rename_to=None):
    cols = {c.lower(): c for c in df.columns}
    for cand in candidates:
        lc = cand.lower()
        if lc in cols:
            real = cols[lc]
            if rename_to and real != rename_to:
                df.rename(columns={real: rename_to}, inplace=True)
                return rename_to
            return real
    return None

def pick_csv(path_or_file):
    if os.path.isfile(path_or_file):
        return path_or_file
    cands = glob.glob(os.path.join(path_or_file, "*.csv"))
    if not cands:
        raise FileNotFoundError(f"No CSV files under {path_or_file}")
    # prefer names containing lattice/string/summary and largest file
    def score(p):
        name = os.path.basename(p).lower()
        s = 0
        for token in ("lattice","string","tension","summary","fund"):
            if token in name: s += 1
        return (s, os.path.getsize(p))
    cands.sort(key=score, reverse=True)
    return cands[0]

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--adjoint", required=True)
    ap.add_argument("--fundamental", required=True, help="CSV file OR directory")
    ap.add_argument("--out", required=True)
    args = ap.parse_args()

    adj = pd.read_csv(args.adjoint)
    fund_path = args.fundamental if os.path.isfile(args.fundamental) else pick_csv(args.fundamental)
    fun = pd.read_csv(fund_path)

    # Normalize key columns
    # Try to ensure both have 'gauge'; if fundamental lacks it, we’ll merge without it.
    g_adj = find_col(adj, GAUGE_ALIASES, rename_to="gauge")
    g_fun = find_col(fun, GAUGE_ALIASES, rename_to="gauge")

    # Normalize sigma columns
    if "sigma_adj" not in adj.columns:
        col = find_col(adj, SIGMA_ALIASES_ADJ)
        if not col:
            raise KeyError(f"Could not find adjoint sigma column in {args.adjoint}")
        adj.rename(columns={col: "sigma_adj"}, inplace=True)
    if "sigma_fund" not in fun.columns:
        col = find_col(fun, SIGMA_ALIASES_FUND)
        if not col:
            raise KeyError(f"Could not find fundamental sigma column in {fund_path}")
        fun.rename(columns={col: "sigma_fund"}, inplace=True)

    # Build merge keys = preferred keys present in BOTH dataframes
    both = set(c.lower() for c in adj.columns) & set(c.lower() for c in fun.columns)
    key_map = {k: next((c for c in adj.columns if c.lower()==k.lower()), k) for k in PREF_KEYS}
    keys = [key_map[k] for k in PREF_KEYS if k.lower() in both]

    if not keys:
        raise KeyError(f"No common merge keys found between files. "
                       f"Adj columns: {list(adj.columns)}, Fund columns: {list(fun.columns)}")

    # Keep only keys + needed measure
    fun_sub = fun[[*keys, "sigma_fund"]].copy()
    merged = adj.merge(fun_sub, on=keys, how="inner")

    merged["ratio_adj_over_fund"] = merged["sigma_adj"] / merged["sigma_fund"]
    os.makedirs(os.path.dirname(args.out) or ".", exist_ok=True)
    merged.to_csv(args.out, index=False)

    print(f"[ok] merged on keys {keys}")
    print(f"[ok] fundamental file: {fund_path}")
    print(f"[ok] wrote {args.out} with {len(merged)} rows")

if __name__ == "__main__":
    main()
